Data import, cleaning, and exploration

cf <- readRDS("data/campfire-tweets-2020-04-17.Rds")

Sources <- cf %>%
  filter(str_detect(screen_name, "CALFIRE_ButteCo|Cal_Fire|ButteSheriff|ChicoPolice|ChicoFD|CountyOfButte|Paradise_CA"))


no_outliers <- top_n(Sources, -29, created_at_pst)

no_outliers %>%
  group_by(screen_name) %>%
  summarize(min(created_at_pst))
## # A tibble: 3 x 2
##   screen_name     `min(created_at_pst)`
##   <chr>           <dttm>               
## 1 ButteSheriff    2018-11-08 08:03:55  
## 2 CALFIRE_ButteCo 2018-11-08 06:51:47  
## 3 ChicoFD         2018-11-08 07:46:17
no_outliers$screen_name <- as.factor(no_outliers$screen_name)

no_outliers %>% group_by(tweet_hour, screen_name, tweet_min) %>% 
       summarize(tweet_count=n()) %>% 
    ggplot(aes(x=tweet_hour, y=tweet_count, fill=screen_name)) + geom_col()

range(Sources$created_at_pst)
## [1] "2018-11-08 06:51:47 PST" "2018-12-19 13:46:14 PST"
plot.fav  <- no_outliers %>% filter(favorite_count>1) %>% ggplot(aes(x=favorite_count, fill=screen_name)) + geom_histogram()
plot.rt  <- no_outliers %>% filter(retweet_count>1) %>% ggplot(aes(x=retweet_count, fill=screen_name)) + geom_histogram()
plot.quo  <- no_outliers %>% filter(quote_count>1) %>% ggplot(aes(x=quote_count, fill=screen_name)) + geom_histogram()
plot.rply  <- no_outliers %>% filter(reply_count>1) %>% ggplot(aes(x=reply_count, fill=screen_name)) + geom_histogram()

gridExtra::grid.arrange(plot.fav, plot.rt, plot.quo, plot.rply, nrow=2)

Second Plot Type

library(vistime)
library(plotly)
no_outliers$text <- gsub("(\\. )", "\\.\n", no_outliers$text)
no_outliers$text <- gsub("(^\\#)", "\n\\#", no_outliers$text)
no_outliers$text <- gsub("(^\\@)", "\n\\@", no_outliers$text)
no_outliers$text <- gsub("(\\: )+", "\\:\n", no_outliers$text)
no_outliers$text <- gsub("(http)", "\nhttp", no_outliers$text)


time <- (vistime(no_outliers, events = "text", groups = "screen_name", 
                     start = "created_at_pst", show_labels=FALSE, color = "#0bc8e0"))

timeline <- plotly_build(time)

m <- list(
    l = 50,
    r = 50,
    b = 100,
    t = 100,
    pad = 4
)

h <- timeline %>%
layout(autosize = F, width = 1100, height = 600, margin = m)
h

And another

library(timelineS)
#timelineS(no_outliers, main = "Life of Michael Jackson")
news <- cf %>%
  filter(str_detect(screen_name,"news|News") | str_detect(description, "news|News")) %>%
  filter(verified=="TRUE")

news_orgs <- cf %>%
  users_data() %>%
  distinct(screen_name, .keep_all = TRUE) %>%
  filter(str_detect(screen_name, "news|News") | str_detect(description, "news|News")) %>%
  filter(verified=="TRUE") %>%
  arrange(desc(followers_count)) 

news$user_type <- "news"
public <- anti_join(x = cf, y = news_orgs, by = "screen_name")
public$user_type <- "public"

cf <- rbind(public, news)


top.20.users <- news %>% 
  group_by(screen_name) %>% 
  summarise(n=n()) %>% 
  arrange(desc(n)) %>% 
  slice(1:20)

ggplot(top.20.users, aes(x = reorder(screen_name, -n), y=n)) +
  geom_bar(stat="identity", fill="darkslategray")+
  theme_minimal() + coord_flip() + 
  xlab("Users") + ylab("Count")

Applying Sentiments

I’m interested in the sentiment difference between users who are considered a news outlet vs the general public. We plan to look at more individual political people like the Sheriff when going through this more thoroughly in our project.

ts1 <- tweet_words_nostop %>%
          inner_join(get_sentiments("afinn"))

ts2 <- ts1 %>% group_by(status_id) %>% summarize(sentiment=sum(value))
cf2 <- Sources %>% left_join(ts2, by='status_id')

ggplot(cf2, aes(x=sentiment, col=screen_name)) + geom_density(lwd=2) + theme_minimal()